** Data reading and variable selection from raw data
** Japanese Social Stratification and Mobility 2005


** 01. Reading data **
cap log close
clear all
set more off
cd /*insert you work directory here*/
unicode encoding set "Shift_JIS" 
unicode translate 0764.dta
use 0764.dta
save 0764.dta, replace

cd /*insert you work directory here*/

** 02. Consructing year and country variables **

ge year=2005
lab var year "survey year"

ge country=392
lab var country "ISO country code"
//Japan: 392 (see "ISO Country Codes.pdf) 


** 03. ID variables **

ge pid=ID
lab var pid "person id"


** 04. Basic Demographics (Sex and Age/birth year) **

ge sex=q01_1
lab var sex "sex"
lab def sex 1 "male" 2 "female"
lab val sex sex

ge age=q01_2a
lab var age "age"

ge birthyr=year-age
lab var birthyr "year of birth"


** 05. Siblings **

ge nsibs=q09f-1
lab var nsibs "number of siblings in total"
lab def nsibs 98 "don't know/not applicable"
lab val nsibs nsibs

recode q09a (1=1)(2=2)(3=3)(4=4)(5=5)(6=6)(7=7)(8=8)(9=999 "don't know/not applicable"), into (olderbro)
recode q09c (1=1)(2=2)(3=3)(4=4)(5=5)(6=6)(7=7)(8=8)(9=999 "don't know/not applicable"), into (youngerbro)
recode q09b (1=1)(2=2)(3=3)(4=4)(5=5)(6=6)(7=7)(8=8)(9=999 "don't know/not applicable"), into (oldersis)
recode q09b (1=1)(2=2)(3=3)(4=4)(5=5)(6=6)(7=7)(8=8)(9=999 "don't know/not applicable"), into (youngersis)

ge nbro=olderbro+youngerbro
lab var nbro "number of brothers"
lab def nbro 1000 "don't know/not applicable" 1998 "don't know/not applicable"
lab val nbro nbro

ge nsis=oldersis+youngersis
lab var nsis "number of sisters"
lab def nsis 1998 "don't know/not applicable"
lab val nsis nsis

ge birthorder=olderbro+oldersis+1
lab var birthorder "birth order"
lab def birthorder 1001 "don't know/not applicable" 1002 "don't know/not applicable" 1999 "don't know/not applicable"
lab val birthorder birthorder


** 06. Own education **

ge educ=ed_ssm
lab var educ "highest education obtained"

lab def educ 1 "elemantary school" 2 "high school" 3 "college of technology" 4 "2-year college/y" ///
5 "college/university" 6 "graduate school" 8 "no degree/qualifacation" 9 "don't know"

lab val educ educ

** 07. Parents' education: Father and/or Mother **

//highest education obtained
ge faeduc=q21_1
lab var faeduc "father's education"

lab def faeduc 1 "ordinary elementary_old system" 2 "higher elementary_old system" 3 "junior high/girls high_old system" ///
4 "vocational_old system" 5 "normal_old system" 6 "higher or vocational_old system/higher normal" 7 "university/graduate_old system" ///
8 "junior high" 9 "high" 10 "2-year college/College of technology" 11 "university" 12 "graduate school" 13 "don't know" 99 "no answer"

lab val faeduc faeduc 

ge moeduc=q21_2
lab var moeduc "mother's education"
lab def moeduc 1 "ordinary elementary_old system" 2 "higher elementary_old system" 3 "junior high/girls high_old system" ///
4 "vocational_old system" 5 "normal_old system" 6 "higher or vocational_old system/higher normal" 7 "university/graduate_old system" ///
8 "junior high" 9 "high" 10 "2-year college/College of technology" 11 "university" 12 "graduate school" 13 "don't know" 99 "no answer"
lab val moeduc moeduc


** 08. Own occupation **

ge empstat=q02a
lab var empstat "current employment status"

lab def empstat 1 "employer/excutive" 2 "regular employee" 3 "temporary employee/part-time worker" 4 "dispacthed employee" ///
5 "contract worker" 6 "self-employeed/freelance" 7 "family worker" 8 "internal job" 9 "unemployeed: looking for jobs now" ///
10 "unemployeed: not looking for jobs now" 11 "student" 99 "don't know/no answer"

lab val empstat empstat

ge occ_ISIC=q02bi
lab var occ_ISIC "industry (ISIC code)"
lab def occ_ISIC 99998 "not available" 99999 "don't know/no answer"
lab val occ_ISIC occ_ISIC

ge occ_ISCO=q02di
lab var occ_ISCO "occupation/work content (ISCO code)"
lab def occ_ISCO 0 "not classifiable" 99998 "not available" 99999 "don't know/no answer"
lab val occ_ISCO occ_ISCO

ge copsize=q02c
lab var copsize "the number of employees of the firm"

lab def copsize 1 "1 person" 2 "2-4 people" 3 "5-9 people" 4 "10-29 people" 5 "30-99 people" 6 "100-299 people" 7 "300-499 people" ///
8 "500-999 people" 9 "above 1000 people" 10 "government sector" 98 "not applicable" 99 "don't know/no answer"

lab val copsize copsize

ge title=q02e
lab var title "job title/managing responsiblities"

lab def title 1 "no title" 2 "Group leader, Foreman" 3 "Sub-section Head agency or equivalent - kakaricho" ///
4 "Section Head, Manager or equivalent - kacho" 5 "Department Head, General Manager or equivalent - bucho" ///
6 "Director, Executive Head - shacho" 8 "not applicable" 9 "don't know/not available"

lab val title title

** 09. Parents' occupation **

//fathers occupation when respondent 15
ge faempstat15=q23_1a
lab var faempstat15 "father's employment status when respondent 15"
lab val faempstat15 empstat

ge faocc15_ISIC=q23_1bi
lab var faocc15_ISIC "father's industry (ISIC code) when respondent 15"
lab val faocc15_ISIC occ_ISIC

ge faocc15_ISCO=q23_1di
lab var faocc15_ISCO "father's occupation/work content (ISCO code) when respondent 15"
lab val faocc15_ISCO occ_ISCO

ge facopsize15=q23_1c
lab var facopsize15 "father: the number of employees of the firm when respondent 15"
lab val facopsize15 copsize

ge fatitle15=q23_1e
lab var fatitle15 "father's job title/managing responsiblities when respondent 15"
lab val fatitle15 title

//mothers occupation when respondent 15
ge mowork15=q23_3
lab var mowork15 "whether mothe was working when respondent 15"
lab def mowork15 1 "yes" 2 "no" 3 "no mother then" 4 "don't know/no answer"
lab val mowork mowork

//father current occupation
ge faempstat=q23_2a
lab var faempstat "father's current employment status"
lab val faempstat empstat

*ISIC and ISCO codes refer to codebook*
ge faocc_ISIC=q23_2bi
lab var faocc_ISIC "father's industry (ISIC code)"
lab val faocc_ISIC occ_ISIC

ge faocc_ISCO=q23_2di
lab var faocc_ISCO "father's occupation/work content (ISCO code)"
lab val faocc_ISCO occ_ISCO

ge facopsize=q23_2c
lab var facopsize "father: the number of employees of the firm"
lab val facopsize copsize

ge fatitle=q23_2e
lab var fatitle "father's job title/managing responsiblities"
lab val fatitle title

//mother current occupation
ge moempstat=q23_3a
lab var moempstat "mother's current employment status"
lab val moempstat empstat

*ISIC and ISCO codes refer to codebook*
ge moocc_ISIC=q23_3bi
lab var moocc_ISIC "mother's industry (ISIC code)"
lab val moocc_ISIC occ_ISIC

ge moocc_ISCO=q23_3di
lab var moocc_ISCO "mother's occupation/work content (ISCO code)"
lab val moocc_ISCO occ_ISCO

ge mocopsize=q23_3c
lab var mocopsize "mother: the number of employees of the firm"
lab val mocopsize copsize

ge motitle=q23_3e
lab var motitle "mother's job title/managing responsiblities"
lab val motitle title


** 10. Tabulate the Identified Variables **

log using /*insert you work directory here*/, replace text

** Data reading and variable selection from raw data
** 1/03/2017
** Japanese Social Stratification and Mobility 2005

** Sex **
tab sex

** Age, Birth Year **
sum age birthyr, d

** Siblings **
sum nsibs nbro nsis birthorder, d

** R's Own Education **
tab1 educ 

** Parental Education **
tab1 faeduc moeduc 

** R's Own Occupation **
tab1 empstat occ_ISIC occ_ISCO copsize title

** Parental Occupation **
tab1 faempstat15 faocc15_ISIC faocc15_ISCO facopsize15 fatitle15 mowork15 
tab1 faempstat faocc_ISIC faocc_ISCO facopsize fatitle moempstat moocc_ISIC moocc_ISCO mocopsize motitle


log close

** 11. Keep the identified variables only

keep year country pid sex age birthyr ///
	 nbro nsis nsibs birthorder ///
	 educ faeduc moeduc ///
	 empstat occ_ISIC occ_ISCO copsize title ///
	 faempstat15 faocc15_ISIC faocc15_ISCO facopsize15 fatitle15 mowork15 ///
	 faempstat faocc_ISIC faocc_ISCO facopsize fatitle moempstat moocc_ISIC moocc_ISCO mocopsize motitle


** 12. Save the Data File **

saveold /*insert you work directory here*/, replace



** 13. Homoginising education **
** Own Education **
rename educ educ_cat

ge educ_yrs=6 if educ_cat==1
replace educ_yrs=12 if educ_cat==2
replace educ_yrs=14 if educ_cat==3
replace educ_yrs=14 if educ_cat==4
replace educ_yrs=16 if educ_cat==5
replace educ_yrs=20 if educ_cat==6
replace educ_yrs=0 if educ_cat==8
replace educ_yrs=. if educ_cat==9
lab var educ_yrs "respondent highest education in years"

ge educ_ISCED=100 if educ_cat==1
replace educ_ISCED=344 if educ_cat==2
replace educ_ISCED=500 if educ_cat==3
replace educ_ISCED=500 if educ_cat==4
replace educ_ISCED=665 if educ_cat==5
replace educ_ISCED=767 if educ_cat==6
replace educ_ISCED=864 if educ_cat==7
replace educ_ISCED=020 if educ_cat==8
replace educ_ISCED=. if educ_cat==9
lab var educ_ISCED "respondent highest education in ISCED code"


** Parents Education **

ge faeduc_flag=1 

rename faeduc faeduc_cat
rename moeduc maeduc_cat

ge faeduc_yrs=0 if faeduc_cat==0
replace faeduc_yrs=6 if faeduc_cat==1
replace faeduc_yrs=8 if faeduc_cat==2
replace faeduc_yrs=10 if faeduc_cat==3
replace faeduc_yrs=11 if faeduc_cat==4
replace faeduc_yrs=14 if faeduc_cat==5
replace faeduc_yrs=14 if faeduc_cat==6
replace faeduc_yrs=16 if faeduc_cat==7
replace faeduc_yrs=9 if faeduc_cat==8
replace faeduc_yrs=12 if faeduc_cat==9
replace faeduc_yrs=14 if faeduc_cat==10
replace faeduc_yrs=16 if faeduc_cat==11
replace faeduc_yrs=20 if faeduc_cat==12
replace faeduc_yrs=. if faeduc_cat==13
replace faeduc_yrs=. if faeduc_cat==99
lab var faeduc_yrs "father's education in years"

ge maeduc_yrs=0 if maeduc_cat==0
replace maeduc_yrs=6 if maeduc_cat==1
replace maeduc_yrs=8 if maeduc_cat==2
replace maeduc_yrs=10 if maeduc_cat==3
replace maeduc_yrs=11 if maeduc_cat==4
replace maeduc_yrs=14 if maeduc_cat==5
replace maeduc_yrs=14 if maeduc_cat==6
replace maeduc_yrs=16 if maeduc_cat==7
replace maeduc_yrs=9 if maeduc_cat==8
replace maeduc_yrs=12 if maeduc_cat==9
replace maeduc_yrs=14 if maeduc_cat==10
replace maeduc_yrs=16 if maeduc_cat==11
replace maeduc_yrs=20 if maeduc_cat==12
replace maeduc_yrs=. if maeduc_cat==13
replace maeduc_yrs=. if maeduc_cat==99
lab var maeduc_yrs "mother's education in years"

ge faeduc_ISCED=100 if faeduc_cat==1
replace faeduc_ISCED=100 if faeduc_cat==2
replace faeduc_ISCED=300 if faeduc_cat==3
replace faeduc_ISCED=300 if faeduc_cat==4
replace faeduc_ISCED=300 if faeduc_cat==5
replace faeduc_ISCED=500 if faeduc_cat==6
replace faeduc_ISCED=600 if faeduc_cat==7
replace faeduc_ISCED=244 if faeduc_cat==8
replace faeduc_ISCED=344 if faeduc_cat==9
replace faeduc_ISCED=500 if faeduc_cat==10
replace faeduc_ISCED=665 if faeduc_cat==11
replace faeduc_ISCED=767 if faeduc_cat==12
replace faeduc_ISCED=. if faeduc_cat==13
replace faeduc_ISCED=. if faeduc_cat==99
lab var faeduc_ISCED "father highest education in ISCED code"

ge maeduc_ISCED=100 if maeduc_cat==1
replace maeduc_ISCED=100 if maeduc_cat==2
replace maeduc_ISCED=300 if maeduc_cat==3
replace maeduc_ISCED=300 if maeduc_cat==4
replace maeduc_ISCED=300 if maeduc_cat==5
replace maeduc_ISCED=500 if maeduc_cat==6
replace maeduc_ISCED=600 if maeduc_cat==7
replace maeduc_ISCED=244 if maeduc_cat==8
replace maeduc_ISCED=344 if maeduc_cat==9
replace maeduc_ISCED=500 if maeduc_cat==10
replace maeduc_ISCED=665 if maeduc_cat==11
replace maeduc_ISCED=767 if maeduc_cat==12
replace maeduc_ISCED=. if maeduc_cat==13
replace maeduc_ISCED=. if maeduc_cat==99
lab var maeduc_ISCED "mother highest education in ISCED code"

** 14. Homoginising sibling **
//cutoff
ge nsibs_flag=99
lab var nsibs_flag "cutoff of total number of siblings"
ge nsis_flag=99
lab var nsis_flag "cutoff of number of sisters"
ge nbro_flag=99
lab var nbro_flag "cutoff of number of brothers"

lab def nsib_flag 99 "no cutoff"
lab val nsis_flag nbro_flag nsibs_flag nsib_flag

//recode missing
replace nsis=. if nsis==1998
replace nbro=. if nbro==1998 | nbro==1000
replace nsibs=. if nsibs==98

//number of brothers and sisters not avaialable


** 15. Tab Education and Sibling Variables **
tab1 sex age birthyr
tab1 educ_cat educ_yrs faeduc_cat faeduc_yrs maeduc_cat maeduc_yrs faeduc_flag 
tab1 nsibs nsibs_flag


** 16. Save the Data File **

saveold /*insert you work directory here*/, replace
